############################################################################
# Bypassing the Enemy (2017)
# This file contains the replication code for the paper 
# "Bypassing the Enemy: Distributive Politics, Credit Claiming, and Nonstate Organizations in Brazil"

########### Getting data

########### Recoding variables

############ Tables and Figures

############################################################################

rm(list = ls(all = TRUE))

#Preambule
#R version 3.3.2

#Change to the appropiate directory
dir <- ""

options(scipen=999) # supressing scientific notation
par(mar=c(5.1,4.1,4.1,2.1)) 
par(mfrow=c(1,1))

#Installing packages

#Updates to version 2

#Use the appropriate package versions (by either installing them manually or checkpoint package)
#Producing pdf figures instead of publication eps because of package conflicts (when using checkpoint)
#If wish to produce publication-quality figures, use the Cairo package and cairo_ps instead of pdf


#If you're getting the warning below, it's because you're using
#a R version other than 3.3.2
#Warning messages:
#  1: In (X - c)/h :
#  Recycling array of length 1 in vector-array arithmetic is deprecated.
#Use c() or as.vector() instead.

#You may need to setwd(dir) to get checkpoint to work properly

#setwd(dir)
library(checkpoint)
checkpoint("2017-03-30", checkpointLocation = tempdir())

#Libraries used
library(sandwich) 
library(xtable)
library(ri)
library(rdrobust)
library(Hmisc)
library(lmtest)
library(cjoint)
library(tidyverse)

#Functions
source(paste0(dir, "2.bypassing_functions_final.R"))

############################################################################
#Getting main data

load(paste0(dir, "final_replication.RData"))

############################################################################
#Creating variables used in the analysis

#Windows (used in plots and tables)
H <- c(seq(from = .005, to = .0095, by = .0005), seq(from = .01, to = .05, by = .001))

#Clusters
data_200315$ibge2 <- as.numeric(data_200315$ibge)
data_200715$ibge2 <- as.numeric(data_200715$ibge)

#Nonzero for outcome variable
data_200315$nonzero <- ifelse(data_200315$dtransfers.pc==0, 0, 1)
data_200315$nonzero.mayors <- ifelse(data_200315$dtransfers.pc.mayors==0, 0, 1)
data_200715$nonzero <- ifelse(data_200715$dtransfers.pc==0, 0, 1)
data_200715$nonzero.mayors <- ifelse(data_200715$dtransfers.pc.mayors==0, 0, 1)

#Recoding running variable
data_200315$vote_margin_share2 <- data_200315$vote_margin_share*100
data_200715$vote_margin_share2 <- data_200715$vote_margin_share*100

#Recoding for per capita or vote share

data_200315$p_13.1 <- data_200315$p_13/data_200315$p_val
data_200315$p_45.1 <- data_200315$p_45/data_200315$p_val
data_200315$f.nom_13.1 <- data_200315$f.nom_13/data_200315$f.nom_total
data_200315$f.nom_45.1 <- data_200315$f.nom_45/data_200315$f.nom_total
data_200315$e.nom_13.1 <- data_200315$e.nom_13/data_200315$e.nom_total
data_200315$e.nom_45.1 <- data_200315$e.nom_45/data_200315$e.nom_total
data_200315$g_13.1 <- data_200315$g_13/data_200315$g_val
data_200315$g_45.1 <- data_200315$g_45/data_200315$g_val
data_200315$Comparecimento1t.1 <- data_200315$Comparecimento1t/data_200315$Aptos

# Create vectors for tests

X2000_incomepercapita <- NA
X2000_doctor <- NA
X2000_idh_education <- NA
X2000_idh_income <- NA
X2000_idh_longevity <- NA
X2000_illiterate <- NA
X2000_infant <- NA
X2000_pop <- NA
X2000_poverty <- NA
p_13.1 <- NA
p_45.1 <- NA
f.nom_13.1 <- NA
f.nom_45.1 <- NA
e.nom_13.1 <- NA
e.nom_45.1 <- NA
g_13.1 <- NA
g_45.1 <- NA
Comparecimento1t.1 <- NA
age <- NA
sex <- NA
ftest.h <- NA

#Renaming variables
names(X2000_incomepercapita) <- "Income per capita"
names(X2000_doctor) <- "Doctors per thousand pop."
names(X2000_idh_education) <- "Education (IDH)"
names(X2000_idh_income) <- "Income (IDH)"
names(X2000_idh_longevity) <- "Longevity (IDH)"
names(X2000_illiterate) <- "Illiteracy rate"
names(X2000_infant) <- "Infant mortality"
names(X2000_pop)  <- "Population"
names(X2000_poverty) <- "Poverty rate"
names(p_13.1) <- "Vote for Lula in 1998"
names(p_45.1) <- "Vote for FHC in 1998"
names(f.nom_13.1) <- "Vote for PT (fed. dep.) in 1998"
names(f.nom_45.1) <- "Votes for PSDB (fed. dep.) in 1998"
names(g_13.1) <- "Vote for PT (governor) in 1998"
names(g_45.1) <- "Vote for PSDB (governor) in 1998"
names(e.nom_13.1) <- "Votes for PT (state dep.) in 1998"
names(e.nom_45.1) <- "Votes for PSBD (state dep.) in 1998"
names(Comparecimento1t.1) <- "Turnout in 1998"
names(sex) <- "Mayor's Sex"
names(age) <- "Mayor's Age"
names(ftest.h) <-  "F-test"

#Creating Standardized Variables
data_200315$X2000_incomepercapita.2 <- (data_200315$X2000_incomepercapita - mean(data_200315$X2000_incomepercapita, na.rm=T))/sd(data_200315$X2000_incomepercapita, na.rm=T)
data_200315$X2000_doctor.2 <- (data_200315$X2000_doctor- mean(data_200315$X2000_doctor, na.rm=T))/sd(data_200315$X2000_doctor, na.rm=T)
data_200315$X2000_idh_education.2 <- (data_200315$X2000_idh_education - mean(data_200315$X2000_idh_education, na.rm=T))/sd(data_200315$X2000_idh_education, na.rm=T)
data_200315$X2000_idh_income.2 <- (data_200315$X2000_idh_income - mean(data_200315$X2000_idh_income, na.rm=T))/sd(data_200315$X2000_idh_income, na.rm=T)
data_200315$X2000_idh_longevity.2 <- (data_200315$X2000_idh_longevity - mean(data_200315$X2000_idh_longevity, na.rm=T))/sd(data_200315$X2000_idh_longevity, na.rm=T)
data_200315$X2000_illiterate.2 <- (data_200315$X2000_illiterate - mean(data_200315$X2000_illiterate, na.rm=T))/sd(data_200315$X2000_illiterate, na.rm=T)
data_200315$X2000_infant.2 <- (data_200315$X2000_infant - mean(data_200315$X2000_infant, na.rm=T))/sd(data_200315$X2000_infant, na.rm=T)
data_200315$X2000_pop.2 <- (data_200315$X2000_pop - mean(data_200315$X2000_pop, na.rm=T))/sd(data_200315$X2000_pop, na.rm=T)
data_200315$X2000_poverty.2 <- (data_200315$X2000_poverty - mean(data_200315$X2000_poverty, na.rm=T))/sd(data_200315$X2000_poverty, na.rm=T)
data_200315$p_13.2 <- (data_200315$p_13.1 - mean(data_200315$p_13.1, na.rm=T))/sd(data_200315$p_13.1, na.rm=T)
data_200315$p_45.2 <- (data_200315$p_45.1 - mean(data_200315$p_45.1, na.rm=T))/sd(data_200315$p_45.1, na.rm=T)
data_200315$f.nom_13.2 <- (data_200315$f.nom_13.1 - mean(data_200315$f.nom_13.1, na.rm=T))/sd(data_200315$f.nom_13.1, na.rm=T) 
data_200315$f.nom_45.2 <- (data_200315$f.nom_45.1 - mean(data_200315$f.nom_45.1, na.rm=T))/sd(data_200315$f.nom_45.1, na.rm=T)
data_200315$g_13.2 <- (data_200315$g_13.1 - mean(data_200315$g_13.1, na.rm=T))/sd(data_200315$g_13.1, na.rm=T) 
data_200315$g_45.2 <- (data_200315$g_45.1 - mean(data_200315$g_45.1, na.rm=T))/sd(data_200315$g_45.1, na.rm=T)
data_200315$e.nom_13.2 <- (data_200315$e.nom_13.1 - mean(data_200315$e.nom_13.1, na.rm=T))/sd(data_200315$e.nom_13.1, na.rm=T)
data_200315$e.nom_45.2 <- (data_200315$e.nom_45.1 - mean(data_200315$e.nom_45.1, na.rm=T))/sd(data_200315$e.nom_45.1, na.rm=T)
data_200315$Comparecimento1t.2 <- (data_200315$Comparecimento1t.1 - mean(data_200315$Comparecimento1t.1, na.rm=T))/sd(data_200315$Comparecimento1t.1, na.rm=T)
data_200315$sex.2 <- (data_200315$sex - mean(data_200315$sex, na.rm=T))/sd(data_200315$sex, na.rm=T)
data_200315$age.2 <- (data_200315$age - mean(data_200315$age, na.rm=T))/sd(data_200315$age, na.rm=T)

#Pre-treatment covariates
#List of names
outcomes <-  list(
  data_200315$X2000_incomepercapita.2,
  data_200315$X2000_doctor.2,
  data_200315$X2000_idh_education.2,
  data_200315$X2000_idh_income.2,
  data_200315$X2000_idh_longevity.2,
  data_200315$X2000_illiterate.2,
  data_200315$X2000_infant.2,
  data_200315$X2000_pop.2, 
  data_200315$X2000_poverty.2,
  data_200315$p_13.2, 
  data_200315$p_45.2,
  data_200315$f.nom_13.2, 
  data_200315$f.nom_45.2,
  data_200315$g_13.2, 
  data_200315$g_45.2,
  data_200315$e.nom_13.2,
  data_200315$e.nom_45.2,
  data_200315$Comparecimento1t.2,
  data_200315$sex.2, 
  data_200315$age.2)

names <- c("Income per capita", 
           "Doctors per thousand pop.",
           "Education (IDH)",
           "Income (IDH)", 
           "Longevity (IDH)", 
           "Illiteracy rate", 
           "Infant mortality", 
           "Population",
           "Poverty rate", 
           "Vote for Lula",
           "Vote for FHC", 
           "Vote for PT (fed. dep.)",
           "Votes for PSDB (fed. dep.)",
           "Vote for PT (governor)",
           "Vote for PSDB (governor)",
           "Votes for PT (state dep.)",
           "Votes for PSBD (state dep.)",
           "Turnout", 
           "Mayor's Sex",
           "Mayor's Age")

##########################################################################
#TABLE 1.

table.p <- cbind(aggregate(dtransfers.pc~treat, data_200315, mean), 
                aggregate(dtransfers.pc.mayors~treat, data_200315, mean))

table.c <- cbind(aggregate(dtransfers.pc~treat, data_200315align, mean), 
                 aggregate(dtransfers.pc.mayors~treat, data_200315align, mean))

table1 <- bind_rows(table.p, table.c)
print(xtable(table1[c(2,1,4,3), ], digits=1, align="cccc", include.rownames=F,
       caption="Partisan Alignment and Transfers, in reais per capita (2003-2015)"), caption.placement = "top")


############################################################################
# FIGURE 1. 

# Remove NAs for randomization inference (missing data on about 23 observations 
# in pre-treatment covariates)
data_200315v2 <- data_200315
data_200315v3 <- data_200315v2 %>% filter(!is.na(X2000_incomepercapita.2))
cat(nrow(data_200315v2) - nrow(data_200315v3), "cases missing: demographics") #change this back to v3
data_200315v4 <- data_200315v3 %>% filter(!is.na(age.2))
cat(nrow(data_200315v3) - nrow(data_200315v4), "cases missing: age")
rm(data_200315v2, data_200315v3)

#Getting pre-treatment outcomes
outcomes2 <-  list(
  data_200315v4$X2000_incomepercapita.2,
  data_200315v4$X2000_doctor.2,
  data_200315v4$X2000_idh_education.2,
  data_200315v4$X2000_idh_income.2,
  data_200315v4$X2000_idh_longevity.2,
  data_200315v4$X2000_illiterate.2,
  data_200315v4$X2000_infant.2,
  data_200315v4$X2000_pop.2, 
  data_200315v4$X2000_poverty.2,
  data_200315v4$p_13.2, 
  data_200315v4$p_45.2,
  data_200315v4$f.nom_13.2, 
  data_200315v4$f.nom_45.2,
  data_200315v4$g_13.2, 
  data_200315v4$g_45.2,
  data_200315v4$e.nom_13.2,
  data_200315v4$e.nom_45.2,
  data_200315v4$Comparecimento1t.2,
  data_200315v4$sex.2,
  data_200315v4$age.2)

names <- c("Income per capita", 
           "Doctors per thousand pop.",
           "Education (IDH)",
           "Income (IDH)", 
           "Longevity (IDH)", 
           "Illiteracy rate", 
           "Infant mortality", 
           "Population",
           "Poverty rate", 
           "Vote for Lula",
           "Vote for FHC", 
           "Vote for PT (fed. dep.)",
           "Votes for PSDB (fed. dep.)",
           "Vote for PT (governor)",
           "Vote for PSDB (governor)",
           "Votes for PT (state dep.)",
           "Votes for PSBD (state dep.)",
           "Turnout", 
           "Mayor's Sex",
           "Mayor's Age")

#This runs slowly.
balance_table <- rdd.table.balance(runvar = data_200315v4$vote_margin_share, 
                                   treat = data_200315v4$treat,
                                   outcome = outcomes2, title=names) 

balance_table <- do.call(rbind, balance_table)

pvalues <- balance_table[seq(3, 79, by=4),]
rownames(pvalues) <- names

bw <- c(0.005, 0.01, 0.02, 0.03, 0.04, 0.05)

pdf(paste0(dir, "paper_drafts/main/fig1.pdf"),
    width=20, height=20)
par(mfrow=c(4,5), oma=c(0,3,3, 0))
for (i in 1:20){
  pv.plot(bw, pvalues[i,], names=names[i], cexpoint=5) 
}
dev.off()

############################################################################
#FIGURE 2. 

#FIGURE 2 (a)
pdf(paste0(dir, "paper_drafts/main/fig2a.pdf"), 
    width=8, height=8)
par(mar=c(5, 8, 2.5, 0.5))
rd.plot(x = data_200315$vote_margin_share2, 
        numbinl = 100, numbinr = 100,
        y = data_200315$dtransfers.pc, y.lim = c(0, 30), x.lim= c(-10, 10),
        x.label = "President's Party Vote Margin (%)", 
        y.label = "Transfers to Non-State Providers \n (reais per capita)",
        title = "", N=FALSE)
dev.off()

#FIGURE 2 (b)
pdf(paste0(dir, "paper_drafts/main/fig2b.pdf"), 
    width=8, height=8)
par(mar=c(5, 8, 2.5, 0.5))
rd.plot(x = data_200315$vote_margin_share2,
        numbinl = 150, numbinr = 150,
        y = data_200315$dtransfers.pc.mayors, y.lim = c(110, 150), x.lim=c(-10, 10),
        x.label = "President's Party Vote Margin (%)", 
        y.label = "Transfers to Mayors \n (reais per capita)", 
        title="", N=FALSE)
dev.off()

############################################################################
#FIGURE 3.

#Figure 3 (a) NSP
pdf(paste0(dir, "paper_drafts/main/fig3a.pdf"),
    width=8, height=8)
par(mar=c(5, 4, 2.5, 0.5))
RD_plot(running = data_200315$vote_margin_share2, treat = data_200315$treat, 
        outcome = data_200315$dtransfers.pc, clusterT = T, cluster.var = data_200315$ibge2,
        cutoff = 0, min_running=0.2, max_running=5,
        opt_bw = F, nr_windows = 20, main_est = "dom", ci = "90%",
        add_est = F, nr_obs = T, 
        nr_obs_lab=c(10, 30, 100, 250, 500, 750, 800),
        label_x= "Vote margin (%)\n Absolute Distance from Cutoff",
        label_y= "Average Difference in NSP Transfers \n Between Aligned and Unaligned",
        plot_label= "",
        legend= T)
dev.off()

#Figure 3 (b)
pdf(paste0(dir, "paper_drafts/main/fig3b.pdf"),
    width=8, height=8)
par(mar=c(5, 4, 2.5, 0.5))
RD_plot(running = data_200315$vote_margin_share2, treat = data_200315$treat, 
        outcome = data_200315$dtransfers.pc.mayors, clusterT = T, 
        cluster.var = data_200315$ibge2,
        cutoff = 0, min_running = 0.2, max_running = 5,
        opt_bw = F, nr_windows = 20, main_est = "dom", ci = "90%",
        add_est = F, nr_obs = T, 
        nr_obs_lab=c(10, 30, 100, 250, 500, 750, 800),
        label_x= "Vote margin (%)\n Absolute Distance from Cutoff",
        label_y= "Average Difference in Mayoral Transfers \n Between Aligned and Unaligned",
        plot_label="",
        legend=T)
dev.off()

############################################################################
# TABLE 2. 

#Optimal bandwidths
nsp_cctbw <- rdbwselect(y = data_200315$dtransfers.pc,  
							          x= data_200315$vote_margin_share,
                        all = TRUE, kernel = "uniform", p = 1, q = 2)
                        
mayors_cctbw <- rdbwselect(y = data_200315$dtransfers.pc.mayors, 
								          x = data_200315$vote_margin_share,
                          all = TRUE, kernel = "tri", p = 1, q = 2)

opt.bw.nsp <- optimal.bw(Y = data_200315$dtransfers.pc, 
                         X = data_200315$vote_margin_share, 
                         c = 0, reg = T)

opt.bw.mayors <- optimal.bw(Y = data_200315$dtransfers.pc.mayors, 
                            X = data_200315$vote_margin_share, 
                            c = 0, reg = T)

#NSP
rdd.table(runvar = data_200315$vote_margin_share, treat = data_200315$treat,
          outcome.1 = data_200315$dtransfers.pc, clusterT = T, 
          cluster.var = data_200315$ibge2,
          non.zero = data_200315$nonzero, p = 1, q = 2, kernel = "uniform",
          opt.pc = opt.bw.nsp)

#Mayors
rdd.table(runvar = data_200315$vote_margin_share, treat= data_200315$treat,
          outcome.1 = data_200315$dtransfers.pc.mayors, 
          clusterT = T, cluster.var=data_200315$ibge2,
          non.zero = data_200315$nonzero.mayors, p = 1, q = 2, kernel = "tri",
          opt.pc = opt.bw.mayors)

#####################################################################
# TABLE 3.

#Optimal bandwidths
nsp_cctbw.s <- rdbwselect(y = data_200715$dtransfers.pc,  
							  x = data_200715$vote_margin_share,
                          all = TRUE, kernel = "uniform", p=  1, q = 2)
mayors_cctbw.s <- rdbwselect(y = data_200715$dtransfers.pc.mayors, 
								  x = data_200715$vote_margin_share,
                             all = TRUE, kernel = "tri", p = 1, q = 2)

opt.bw.nsp.s <- optimal.bw(Y = data_200715$dtransfers.pc, 
                           X = data_200715$vote_margin_share, 
                           c = 0, reg = T)

opt.bw.mayors.s <- optimal.bw(Y = data_200715$dtransfers.pc.mayors, 
                              X = data_200715$vote_margin_share, 
                              c = 0, reg = T)

#NSP
rdd.table(runvar = data_200715$vote_margin_share, treat = data_200715$treat,
          outcome.1  = data_200715$dtransfers.pc, clusterT = T, 
          cluster.var = data_200715$ibge2,
          non.zero = data_200715$nonzero, p = 1, q = 2, kernel = "uniform",
          opt.pc = opt.bw.nsp.s)

#Mayors
rdd.table(runvar = data_200715$vote_margin_share, treat = data_200715$treat,
          outcome.1 = data_200715$dtransfers.pc.mayors, clusterT = T, 
          cluster.var = data_200715$ibge2,
          non.zero = data_200715$nonzero.mayors, p = 1, q = 2, kernel="uniform",
          opt.pc = opt.bw.mayors.s)

##################################################################### 
# FIGURE 4.

#Recoding for analysis
conjointff$nsp_cityf <- as.factor(conjointff$nsp_cityf)
conjointff$federalf <- as.factor(conjointff$federalf)

#Analysis by aligned and unaligned
conjointff.psdb <- conjointff %>% filter(partisanship == "O prefeito pertence ao PSDB (número 45).")
conjointff.pt <- conjointff %>% filter(partisanship == "O prefeito pertence ao  PT (número 13).")
conjointff.psdb$nsp_cityf <- relevel(conjointff.psdb$nsp_cityf, ref = "A PREFEITURA é responsável pelo abrigo para moradores de rua.")
conjointff.pt$nsp_cityf <- relevel(conjointff.pt$nsp_cityf, ref = "A PREFEITURA é responsável pelo abrigo para moradores de rua.")


results.choice.psdb <- amce(d_choice ~ nsp_cityf + federalf,
                            data = conjointff.psdb,
                            cluster = TRUE, respondent.id = "respID")
results.choice.pt <- amce(d_choice ~ nsp_cityf + federalf,
                          data = conjointff.pt,
                          cluster = TRUE, respondent.id = "respID")

#Figure 4 (a)
pdf(paste0(dir, "paper_drafts/main/fig4a.pdf"), 
	  width=7, height=7)
par(mar=c(4.1,3.1,4.1,1.1))
plot_cjoint(results = results.choice.psdb)
dev.off()

#Figure 4 (b)
pdf(paste0(dir, "paper_drafts/main/fig4b.pdf"), 
	  width=7, height=7)
par(mar=c(4.1,3.1,4.1,1.1))
plot_cjoint(results = results.choice.pt)
dev.off()

nrow(conjointff.psdb)
nrow(conjointff.pt)
length(unique(conjointff$respID))

##################################################################### 
# TABLE 5.

orgs_data <- orgs_data %>% mutate(pt = ifelse(party_director == "PARTIDO DOS TRABALHADORES", 1, 0), 
                                  pt_mayor = ifelse(party_mayor == "PT", 1, 0), 
                                  otherparty = ifelse((party_director != "PARTIDO DOS TRABALHADORES" & !is.na(party_director)), 1, 0),
                                  noparty = ifelse(is.na(party_director), 1, 0),
                                  party = ifelse(!is.na(party_director), 1, 0))

table5a <- orgs_data %>% group_by(pt, pt_mayor, noparty) %>% summarise(empenhado_def = mean(d_empenhado, na.rm = TRUE))
table5b <- orgs_data %>% group_by(pt, pt_mayor, noparty) %>% summarise(number_orgs = n_distinct(id_org))
table5a <- matrix(table5a[c(4, 3, 2, 1, 6, 5), 4]$empenhado_def, nrow = 3, ncol = 2, byrow = T)
table5b <- matrix(table5b[c(4, 3, 2, 1, 6, 5), 4]$number_orgs, nrow = 3, ncol = 2, byrow = T)
table5 <- matrix(c(table5a[,1], table5b[,1], table5a[,2], table5b[,2]), 3, 4)
xtable(table5)
